## Warning: Missing column names filled in: 'X1' [1]

Hiv diagnoses

income_hiv %>% 
  filter(year != "2011" & age != "All") %>%
  lm(hiv_diagnoses ~ borough + gender + age + mid_income, data = .) %>% 
  summary() %>% 
  broom::tidy() %>% 
  knitr::kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) 0.983 0.302 3.252 0.001
boroughBrooklyn 0.297 0.281 1.060 0.289
boroughManhattan 3.091 0.331 9.332 0.000
boroughQueens -1.245 0.259 -4.811 0.000
boroughStaten Island -4.376 0.397 -11.016 0.000
genderMale 6.083 0.152 40.138 0.000
age20 - 29 9.600 0.262 36.576 0.000
age30 - 39 6.870 0.262 26.175 0.000
age40 - 49 4.627 0.262 17.627 0.000
age50 - 59 2.355 0.262 8.972 0.000
age60+ 0.427 0.262 1.626 0.104
mid_income 0.000 0.000 -17.851 0.000
income_hiv %>% 
  filter(year != "2011" & race != "All") %>%
  lm(hiv_diagnoses ~ borough + gender + race + mid_income, data = .) %>% 
  summary() %>% 
  broom::tidy() %>% 
  knitr::kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) 1.514 0.514 2.945 0.003
boroughBrooklyn 0.357 0.493 0.724 0.469
boroughManhattan 3.710 0.582 6.376 0.000
boroughQueens -1.494 0.454 -3.287 0.001
boroughStaten Island -5.251 0.698 -7.527 0.000
genderMale 7.299 0.266 27.425 0.000
raceBlack 10.932 0.421 25.978 0.000
raceLatino/Hispanic 9.027 0.421 21.451 0.000
raceOther/Unknown -1.380 0.421 -3.278 0.001
raceWhite 3.628 0.421 8.621 0.000
mid_income 0.000 0.000 -12.197 0.000
income_plot = income_hiv %>% 
  filter(year != "2011") %>% 
  group_by(uhf, year) %>% 
  summarise(sum_hiv = mean(hiv_diagnoses), mid_in = median(mid_income)) %>% 
  ggplot(aes(x = mid_in, y = sum_hiv, color = year)) +
  geom_point() + 
  geom_smooth(method = lm) +
  theme_bw() +
  theme(legend.position = "None")
ggplotly(income_plot)

Income distribution in different neighborhood

income_dist = income_hiv %>% 
  ggplot(aes(y = mid_income, x = uhf)) +
  geom_point(alpha = 0.1) +
  coord_flip() +
  theme_bw()
ggplotly(income_dist)         

HIV diagnosis rate

income_hiv %>% 
  filter(year != "2011" & age != "All") %>%
  lm(hiv_diagnosis_rate ~ borough + gender + age + mid_income, data = .) %>% 
  summary() %>% 
  broom::tidy() %>% 
  knitr::kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) 17.851 1.511 11.811 0.000
boroughBrooklyn -12.078 1.403 -8.611 0.000
boroughManhattan 15.424 1.656 9.316 0.000
boroughQueens -22.620 1.293 -17.492 0.000
boroughStaten Island -30.524 1.985 -15.377 0.000
genderMale 39.822 0.757 52.582 0.000
age20 - 29 44.060 1.312 33.589 0.000
age30 - 39 33.215 1.312 25.322 0.000
age40 - 49 27.986 1.312 21.336 0.000
age50 - 59 13.841 1.312 10.552 0.000
age60+ -4.261 1.312 -3.249 0.001
mid_income -0.001 0.000 -18.326 0.000
income_hiv %>% 
  filter(year != "2011" & race != "All") %>%
  lm(hiv_diagnosis_rate ~ borough + gender + race + mid_income, data = .) %>% 
  summary() %>% 
  broom::tidy() %>% 
  knitr::kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) 0.795 2.459 0.323 0.747
boroughBrooklyn -11.951 2.357 -5.070 0.000
boroughManhattan 22.135 2.783 7.955 0.000
boroughQueens -25.218 2.173 -11.603 0.000
boroughStaten Island -31.251 3.336 -9.367 0.000
genderMale 49.480 1.273 38.875 0.000
raceBlack 61.810 2.012 30.713 0.000
raceLatino/Hispanic 34.404 2.012 17.095 0.000
raceOther/Unknown 9.809 2.012 4.874 0.000
raceWhite 12.984 2.012 6.452 0.000
mid_income 0.000 0.000 -1.729 0.084
income_plot_diag_rate = income_hiv %>% 
  filter(year != "2011") %>% 
  group_by(uhf, year) %>% 
  summarise(sum_hiv_diagnosis_rate = sum(hiv_diagnosis_rate), mid_in = median(mid_income)) %>% 
  ggplot(aes(x = mid_in, y = sum_hiv_diagnosis_rate, color = year)) +
  geom_point() + 
  geom_smooth(method = lm) +
  theme_bw() +
  theme(legend.position = "None")
ggplotly(income_plot_diag_rate)